<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><title>GAE 算法 | 云玩家</title><meta name="keywords" content="机器学习,优化算法,强化学习,论文精读"><meta name="author" content="云玩家"><meta name="copyright" content="云玩家"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="准备众所周知, 策略梯度有多种写法, 总的来说, 在保持策略梯度不变的情况下, 策略梯度可以写作$$  g&#x3D;\mathbb{E}\left[\sum_{t&#x3D;0}^{\infty}\Psi_t\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\tag{1}$$  其中 $\Psi$ 可以是$$\begin{aligned}  1.;&amp;\sum\nolimi">
<meta property="og:type" content="article">
<meta property="og:title" content="GAE 算法">
<meta property="og:url" content="http://yunist.cn/ML/RL/primer/GAE/index.html">
<meta property="og:site_name" content="云玩家">
<meta property="og:description" content="准备众所周知, 策略梯度有多种写法, 总的来说, 在保持策略梯度不变的情况下, 策略梯度可以写作$$  g&#x3D;\mathbb{E}\left[\sum_{t&#x3D;0}^{\infty}\Psi_t\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\tag{1}$$  其中 $\Psi$ 可以是$$\begin{aligned}  1.;&amp;\sum\nolimi">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://yunist.cn/ML/RL/primer/GAE/GAE.jpg">
<meta property="article:published_time" content="2020-08-05T08:27:58.000Z">
<meta property="article:modified_time" content="2021-08-14T04:39:34.293Z">
<meta property="article:author" content="云玩家">
<meta property="article:tag" content="机器学习">
<meta property="article:tag" content="优化算法">
<meta property="article:tag" content="强化学习">
<meta property="article:tag" content="论文精读">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://yunist.cn/ML/RL/primer/GAE/GAE.jpg"><link rel="shortcut icon" href="/img/favicon.ico"><link rel="canonical" href="http://yunist.cn/ML/RL/primer/GAE/"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//s4.cnzz.com"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><script async="async" data-pjax="data-pjax" src="https://s4.cnzz.com/z_stat.php?id=1278869595&amp;web_id=1278869595"></script><script>const GLOBAL_CONFIG = { 
  root: '/',
  algolia: undefined,
  localSearch: {"path":"search.xml","languages":{"hits_empty":"找不到您查询的内容：${query}"}},
  translate: undefined,
  noticeOutdate: undefined,
  highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true},
  copy: {
    success: '复制成功',
    error: '复制错误',
    noSupport: '浏览器不支持'
  },
  relativeDate: {
    homepage: false,
    post: false
  },
  runtime: '',
  date_suffix: {
    just: '刚刚',
    min: '分钟前',
    hour: '小时前',
    day: '天前',
    month: '个月前'
  },
  copyright: undefined,
  lightbox: 'fancybox',
  Snackbar: undefined,
  source: {
    jQuery: 'https://cdn.jsdelivr.net/npm/jquery@latest/dist/jquery.min.js',
    justifiedGallery: {
      js: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/js/jquery.justifiedGallery.min.js',
      css: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/css/justifiedGallery.min.css'
    },
    fancybox: {
      js: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.js',
      css: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.css'
    }
  },
  isPhotoFigcaption: false,
  islazyload: false,
  isanchor: false
}</script><script src="/js/yuntools.js"></script><script id="config-diff">var GLOBAL_CONFIG_SITE = { 
  isPost: true,
  isHome: false,
  isHighlightShrink: false,
  isToc: true,
  postUpdate: '2021-08-14 12:39:34'
}</script><noscript><style type="text/css">
  #nav {
    opacity: 1
  }
  .justified-gallery img {
    opacity: 1
  }

  #recent-posts time,
  #post-meta time {
    display: inline !important
  }
</style></noscript><script>(win=>{
    win.saveToLocal = {
      set: function setWithExpiry(key, value, ttl) {
        if (ttl === 0) return
        const now = new Date()
        const expiryDay = ttl * 86400000
        const item = {
          value: value,
          expiry: now.getTime() + expiryDay,
        }
        localStorage.setItem(key, JSON.stringify(item))
      },

      get: function getWithExpiry(key) {
        const itemStr = localStorage.getItem(key)

        if (!itemStr) {
          return undefined
        }
        const item = JSON.parse(itemStr)
        const now = new Date()

        if (now.getTime() > item.expiry) {
          localStorage.removeItem(key)
          return undefined
        }
        return item.value
      }
    }
  
    win.getScript = url => new Promise((resolve, reject) => {
      const script = document.createElement('script')
      script.src = url
      script.async = true
      script.onerror = reject
      script.onload = script.onreadystatechange = function() {
        const loadState = this.readyState
        if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
        script.onload = script.onreadystatechange = null
        resolve()
      }
      document.head.appendChild(script)
    })
  
      win.activateDarkMode = function () {
        document.documentElement.setAttribute('data-theme', 'dark')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
        }
      }
      win.activateLightMode = function () {
        document.documentElement.setAttribute('data-theme', 'light')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
        }
      }
      const t = saveToLocal.get('theme')
    
          if (t === 'dark') activateDarkMode()
          else if (t === 'light') activateLightMode()
        
      const asideStatus = saveToLocal.get('aside-status')
      if (asideStatus !== undefined) {
        if (asideStatus === 'hide') {
          document.documentElement.classList.add('hide-aside')
        } else {
          document.documentElement.classList.remove('hide-aside')
        }
      }
    })(window)</script><link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/cnyist/blog/css/font.css"><meta name="generator" content="Hexo 5.4.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="author-avatar"><img class="avatar-img" src="https://cdn.jsdelivr.net/gh/cnyist/blog/img/avatar.svg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="site-data"><div class="data-item is-center"><div class="data-item-link"><a href="/archives/"><div class="headline">文章</div><div class="length-num">97</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/tags/"><div class="headline">标签</div><div class="length-num">40</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/categories/"><div class="headline">分类</div><div class="length-num">27</div></a></div></div></div><hr/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/chat/"><i class="fa-fw fas fa-comments"></i><span> 闲聊室</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-user"></i><span> about</span></a></div><div class="menus_item"><a class="site-page" href="javascript:void(0);"><i class="fa-fw iconfont icon-Web"></i><span> 镜像站点</span><i class="fas fa-chevron-down expand"></i></a><ul class="menus_item_child"><li><a class="site-page" target="_blank" rel="noopener" href="https://yunist.gitee.io"><i class="fa-fw iconfont icon-gitee-fill-round"></i><span> Gitee Pages</span></a></li></ul></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg" id="page-header" style="background-image: url('/ML/RL/primer/GAE/GAE.jpg')"><nav id="nav"><span id="blog_name"><a id="site-name" href="/">云玩家</a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/chat/"><i class="fa-fw fas fa-comments"></i><span> 闲聊室</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-user"></i><span> about</span></a></div><div class="menus_item"><a class="site-page" href="javascript:void(0);"><i class="fa-fw iconfont icon-Web"></i><span> 镜像站点</span><i class="fas fa-chevron-down expand"></i></a><ul class="menus_item_child"><li><a class="site-page" target="_blank" rel="noopener" href="https://yunist.gitee.io"><i class="fa-fw iconfont icon-gitee-fill-round"></i><span> Gitee Pages</span></a></li></ul></div></div><div id="toggle-menu"><a class="site-page"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">GAE 算法</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2020-08-05T08:27:58.000Z" title="发表于 2020-08-05 16:27:58">2020-08-05</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2021-08-14T04:39:34.293Z" title="更新于 2021-08-14 12:39:34">2021-08-14</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/">机器学习</a><i class="fas fa-angle-right post-meta-separator"></i><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/">强化学习</a><i class="fas fa-angle-right post-meta-separator"></i><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/%E5%85%A5%E9%97%A8/">入门</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-pv-cv"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"></span></span><span class="post-meta-separator">|</span><span class="post-meta-commentcount"><i class="far fa-comments fa-fw post-meta-icon"></i><span class="post-meta-label">评论数:</span><a href="/ML/RL/primer/GAE/#post-comment"><span id="twikoo-count"></span></a></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h1 id="准备"><a href="#准备" class="headerlink" title="准备"></a>准备</h1><p><del>众所周知,</del> 策略梯度有多种写法, 总的来说, 在保持策略梯度不变的情况下, 策略梯度可以写作<br>$$<br>  g=\mathbb{E}\left[\sum_{t=0}^{\infty}\Psi_t\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\tag{1}<br>$$<br>  其中 $\Psi$ 可以是<br>$$<br>\begin{aligned}<br>  1.;&amp;\sum\nolimits_{t=0}^\infty r_t&amp;轨迹的总回报\\<br>  2.;&amp;\sum\nolimits_{t’=t}^\infty r_{t’}&amp;动作后轨迹的总回报\\<br>  3.;&amp;\sum\nolimits_{t’=t}^\infty r_{t’}-b(s_t)&amp;基线形式\\<br>  4.;&amp;Q^{\pi}(s_t,a_t)&amp;状态-动作价值函数\\<br>  5.;&amp;A^{\pi}(s_t,a_t)&amp;优势函数\\<br>  6.;&amp;r_t+V^{\pi}(s_{t+1})-V^\pi(s_t)&amp;\text{TD},残差<br>  \end{aligned}<br>$$<br>  其中<br>$$<br>  \begin{align}<br>  V^\pi(s_t):=\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t:\infty}}}\left[\sum_{l = 0}^\infty r_{t+l}\right]\qquad Q^\pi(s_t, a_t):=\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t+1:\infty}}}\left[\sum_{l = 0}^\infty r_{t+l}\right]\tag{2}\\<br>  A^{\pi}(s_t,a_t)=Q^{\pi}(s_t,a_t)-V^{\pi}(s_t)\tag{3}<br>  \end{align}<br>$$<br>  这里逗号表示 $a:b$ 指的是 $(a,a+1,\dots,b)$ 这样的序列, $\mathbb{E}$ 的下标枚举了要被积分的变量. 前面 $5$ 项的推导或者推导资料在<a href="https://yunist.cn/ML/RL/primer/opt_param/">参数优化</a>中都有说明, 而 $\text{TD}$ 残差其实是优势函数的一种无偏估计.</p>
<p> 其中令 $\Psi_t=A^\pi(s_t, a_t)$ (优势函数) 的选择有几乎最小的方差. 这一点可以从策略梯度的角度直观的解释: 策略梯度中的每一步都会增加 “高于平均水平的动作” 的概率, 减少 “低于平均水平的动作” 的概率, 而优势函数 $A^{\pi}(s_t,a_t)=Q^{\pi}(s_t,a_t)-V^{\pi}(s_t)$ 恰好衡量了动作相对平均水平的好坏, 当动作高于平均水平时, 优势函数会取正数, 从而增加其概率; 当动作低于平均水平时, 优势函数会取负数. 从而降低其概率.</p>
<p> 我们利用一个参数 $\gamma$ 来降低回报对延迟效应的反应的权重 (即减少未来回报的影响) 来减少方差, 代价是引入偏差. 这个参数相当于有折损的 $\text{MDPs}$ 公式, 但是我们将其当做一个在无折损问题中的一个减少方差的参数. 这些有折损的公式可以表示为<br>$$<br>  \begin{align}<br>  V^{\pi, \gamma}(s_t):=\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t:\infty}}}\left[\sum_{l = 0}^\infty \gamma^l r_{t+l}\right]\qquad Q^{\pi, \gamma}(s_t, a_t):=\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t+1:\infty}}}\left[\sum_{l = 0}^\infty \gamma^l r_{t+l}\right]\tag{4}\\<br>  A^{\pi, \gamma}(s_t,a_t)=Q^{\pi, \gamma}(s_t,a_t)-V^{\pi, \gamma}(s_t)\tag{5}<br>  \end{align}<br>$$<br>  梯度的有折损近似可以表示为<br>$$<br>  g^\gamma:=\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[\sum_{t=0}^{\infty}A^{\pi, \gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\tag{6}<br>$$<br>  由于优势函数是未知的, 所以我们需要对它进行估计. 在实践中, 往往只能学习到 $A^{\pi,\gamma}$ 的一个有偏估计 (但没有那么偏) .</p>
<p> 我们引入一个关于 $A^{\pi,\gamma}$ 的一个估计, 并且这个估计是无偏的. 考虑一个与整个轨迹有关的优势函数的估计 $\hat{A}<em>t(s</em>{0:\infty},a_{0:\infty})$ . </p>
<p>  我们定义: <strong>一个估计 $\hat{A}_t$ 是 $\gamma\text{-just}$ 的当且仅当</strong><br>$$<br>  \mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[\hat{A}<em>t(s</em>{0:\infty},a_{0:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]=\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[A^{\pi, \gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\tag{7}<br>$$<br>  因此如果对于所有的 $t$ 来说 $\hat{A}<em>t$ 都是 $\gamma\text{-just}$ 的话, 就有<br>$$<br>  \mathbb{E}</em>{\substack{s_{0:\infty},\\a_{0:\infty}}}\left[\sum_{t=0}^{\infty}\hat{A}<em>t(s</em>{0:\infty},a_{0:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]=g^\gamma\tag{8}<br>$$<br>  一个令 $\hat{A}<em>t$ 为 $\gamma\text{-just}$ 的充分条件是 $\hat{A}<em>t$ 可以被分解为两个函数 $Q_t$ 和 $b_t$ 之间的差. 并且 $Q_t$ 可以依赖于轨迹中的任意变量, 但必须是 $\gamma$-折扣 的 $Q$ 函数 (状态-动作价值函数) 的无偏估计 (如果要求是状态-动作价值函数的估计, 那么由于马尔科夫性, 在时间 $t$ 之前的变量都对状态-动作价值函数的值没有影响, 因此 $Q_t$ 只被时间 $t$ 以及 $t$ 以后的变量决定), 而 $b_t$ 是先于 $a_t$ 被采样的动作和状态的任意函数. 即**如果 $\hat{A}_t$ 可以被写成 $\hat{A}_t(s</em>{0:\infty},a</em>{0:\infty})=Q_t(s_{t:\infty},a_{t:\infty})-b_t(s_{0:t},a_{0:t-1})$ 的形式, 并且对于任意的 $(s_t,a_t)$ 有 $\mathbb{E}<em>{s</em>{t+1:\infty},a_{t+1:\infty}\mid s_t,a_t}[Q_t(s_{t:\infty},a_{t:\infty})]=Q^{\pi, \gamma}(s_t,a_t)$ , 那么 $\hat{A}$ 就是 $\gamma\text{-just}$ 的.**</p>
<p>  证明:<br>$$<br>  \begin{align}<br>  &amp;\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[\hat{A}<em>t(s</em>{0:\infty},a_{0:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[Q_t(s_{t:\infty},a_{t:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]-\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[b_t(s_{0:t},a_{0:t-1})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]<br>  \end{align}<br>$$<br>  其中<br>$$<br>  \begin{align}<br>  &amp;\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[Q_t(s_{t:\infty},a_{t:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:t},\\a_{0:t}}}\left[\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t+1:\infty}}}\left[Q^{\pi,\gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:t},\\a_{0:t}}}\left[\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t+1:\infty}}}\left[A^{\pi, \gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[A^{\pi, \gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]<br>  \end{align}<br>$$<br>  而<br>$$<br>  \begin{align}<br>  &amp;\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[b_t(s_{0:t},a_{0:t-1})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:t},\\a_{0:t-1}}}\left[\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t:\infty}}}\left[b_t(s_{0:t},a_{0:t-1})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]\right]\\<br>  =&amp;\mathbb{E}<em>{\substack{s</em>{0:t},\\a_{0:t-1}}}\left[\mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t:\infty}}}\left[\nabla_\theta \log\pi_0(a_t\mid s_t)\right]b_t(s_{0:t},a_{0:t-1})\right]<br>  \end{align}<br>$$<br>  根据 $\text{EGLP}$ 定理 (见<a href="https://yunist.cn/ML/RL/primer/opt_param/">参数优化</a>) 有<br>$$<br>  \mathbb{E}<em>{\substack{s</em>{t+1:\infty},\\a_{t:\infty}}}\left[\nabla_\theta \log\pi_0(a_t\mid s_t)\right]=0<br>$$<br>  因此<br>$$<br>  \mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[b_t(s_{0:t},a_{0:t-1})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]=0<br>$$<br>  所以有<br>$$<br>  \mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[\hat{A}<em>t(s</em>{0:\infty},a_{0:\infty})\nabla_\theta \log\pi_0(a_t\mid s_t)\right]=\mathbb{E}<em>{\substack{s</em>{0:\infty},\\a_{0:\infty}}}\left[A^{\pi, \gamma}(s_t,a_t)\nabla_\theta \log\pi_0(a_t\mid s_t)\right]<br>$$<br>  也就是说 $\hat{A}$ 是 $\gamma\text{-just}$ 的.</p>
<p>  容易验证, 下列估计表达式 $\hat{A}<em>t$ 也是 $\gamma\text{-just}$ 的:<br>$$<br>  \begin{align}<br>  &amp;\bullet\sum\nolimits</em>{l=0}^\infty \gamma^l r_{t+l}&amp;\qquad\qquad\qquad\qquad\bullet&amp; A^{\pi,\gamma}(s_t,a_t)\\<br>  &amp;\bullet Q^{\pi,\gamma}(s_t,a_t)&amp;\bullet&amp; r_t+\gamma V^{\pi,\gamma}(s_{t+1})-V^{\pi,\gamma}(s_t)<br>  \end{align}<br>$$</p>
<h1 id="优势函数的估计"><a href="#优势函数的估计" class="headerlink" title="优势函数的估计"></a>优势函数的估计</h1><p> 令 $\delta_t^V=r_t+\gamma V(s_{t+1})-V(s_t)$ 为 $V$ 的 $\gamma$ 折扣 $\text{TD}$ 残差. 注意到 $\delta_t^V$ 可以被看做是动作 $a_t$ 的优势. 事实上, 如果我们有确切的价值函数 $V=V^{\pi,\gamma}$ , 那么 $\delta_t^{V^{\pi,\gamma}}$ 会是一个无偏的,  $\gamma\text{-just}$ 的优势估计.<br>$$<br>  \begin{align}<br>  \mathbb{E}<em>{s</em>{t+1}}\left[\delta_t^{V^{\pi,\gamma}}\right]&amp;=\mathbb{E}<em>{s</em>{t+1}}\left[r_t+\gamma V^{\pi,\gamma}(s_{t+1})-V^{\pi,\gamma}(s_t)\right]\\<br>  &amp;=\mathbb{E}<em>{s</em>{t+1}}\left[Q^{\pi,\gamma}(s_t,a_t)-V^{\pi,\gamma}(s_t)\right]\\<br>  &amp;=A^{\pi,\gamma}(s_t,a_t)<br>  \end{align}<br>$$<br>  然而, 也只有当 $V=V^{\pi,\gamma}$ 时这个优势估计才是 $\gamma\text{-just}$ 的.</p>
<p> 接着, 让我们考虑 $k$ 项这样的 $\delta$ 相加的情况, 我们将其记为 $\hat{A}<em>t^{(k)}$<br>$$<br>  \begin{align}<br>  &amp;\hat{A}_t^{(1)}:=\delta_t^V&amp;=&amp;-V(s_t)+r_t+\gamma V(s</em>{t+1})\tag{11}\\<br>  &amp;\hat{A}<em>t^{(2)}:=\delta_t^V+\gamma\delta_{t+1}^V&amp;=&amp;-V(s_t)+r_t+\gamma r</em>{t+1}+\gamma^2 V(s_{t+2})\tag{12}\\<br>  &amp;\hat{A}<em>t^{(2)}:=\delta_t^V+\gamma\delta_{t+1}^V+\gamma^2\delta</em>{t+2}^V&amp;=&amp;-V(s_t)+r_t+\gamma r_{t+1}+\gamma^2 r_{t+2}+\gamma^3 V(s_{t+3})\tag{13}<br>  \end{align}<br>$$</p>
<p>$$<br>  \hat{A}<em>t^{(k)}:=\sum</em>{t=0}^{k-1}\gamma^l\delta_{t+l}^V=-V(s_t)+r_t+\gamma r_{t+1}+\dots+\gamma^{k-1}r_{t+k-1}+\gamma^kV(s_{t+k})\tag{14}<br>$$</p>
<p>  容易得出当且仅当 $V=V^{\pi,\gamma}$ 时 $\hat{A}<em>t^{(k)}$ 是 $\gamma\text{-just}$ 的. 注意到当 $k\to \infty$ 时, 由于 $\gamma^kV(s</em>{t+k})$ 项急剧减小 (当 $V\not= V^{\pi,\gamma}$ 时, 该项会产生偏差), 因此其偏差也会减少,  而 $-V(s_t)$ 项是不会产生偏差的 (因为这是基线). 令 $k\to \infty$ , 我们得到<br>$$<br>  \hat{A}<em>t^{(\infty)}=\sum</em>{l=0}^{\infty}\gamma^l\delta_{t+l}^V=-V(s_t)+\sum_{l=0}^\infty\gamma^lr_{t+l}\tag{15}<br>$$<br>  仅仅是回报减去价值函数基线.</p>
<p> 广义优势估计 $(\text{generalized advantage estimator})$ $\mathrm{GAE}(\gamma,\lambda)$ 被定义为这些 $k$ 步估计量的指数加权平均:<br>$$<br>  \begin{align}<br>  \hat{A}<em>t^{\mathrm{GAE}(\gamma,\lambda)}&amp;:=(1-\lambda)\left(\hat{A}<em>t^{(1)}+\lambda\hat{A}_t^{(2)}+\lambda^2\hat{A}_t^{(3)}+\cdots\right)\\<br>  &amp;=(1-\lambda)\left(\delta_t^V+\lambda(\delta_t^V+\gamma\delta</em>{t+1}^V)+\lambda^2(\delta_t^V+\gamma\delta_{t+1}^V+\gamma^2\delta</em>{t+2}^V)+\cdots\right)\\<br>  &amp;=(1-\lambda)\left(\delta_t^V(1+\lambda+\lambda^2+\cdots)+\gamma\delta_{t+1}^V(\lambda+\lambda^2+\lambda^3+\cdots)\\<br>  +\gamma^2\delta_{t+2}^V(\lambda^2+\lambda^3+\lambda^4+\cdots)+\cdots\right)\\<br>  &amp;=(1-\lambda)\left(\delta_t^V\left(\frac{1}{1-\lambda}\right)+\gamma\delta_{t+1}^V\left(\frac{\lambda}{1-\lambda}\right)+\gamma^2\delta_{t+2}^V\left(\frac{\lambda^2}{1-\lambda}\right)+\cdots\right)\\<br>  &amp;=\sum_{l=0}^\infty(\gamma\lambda)^l\delta_{t+l}^V\tag{16}<br>  \end{align}<br>$$<br>  这个公式有两个特殊的情况即 $\lambda=0$ 和 $\lambda=1$ ,<br>$$<br>\begin{align}<br>  \mathrm{GAE}(\gamma, 0):&amp;\hat{A}<em>t:=\delta_t=r_t+\gamma V(s</em>{t+1})-V(s_t)\tag{17}\\<br>  \mathrm{GAE}(\gamma,1):&amp;\hat{A}<em>t:=\sum</em>{l=0}^\infty \gamma^l\delta_{t+l}=\sum_{l=0}^\infty<br>  \gamma^lr_{t+l}-V(s_t)\tag{18}<br>  \end{align}<br>$$<br>&nbsp;$\mathrm{GAE}(\gamma,1)$ 是 $\gamma\text{-just}$ 的, 不论 $V$ 的精度如何, 但由于其表达式中有多项和而导致其有高方差. 而 $\mathrm{GAE}(\gamma,0)$ 是 $\gamma\text{-just}$ 的当且仅当 $V=V^{\pi.\gamma}$ , 如果并非如此, 那么就会包含偏差, 但其往往具有更低的方差. 当 $0&lt;\lambda&lt;1$ 时我们得到更普遍的优势估计, 并且通过调整参数 $\lambda$ , 我们可以在偏差和方差之间取得一个平衡.</p>
<p> 我们引入了一个包含两个参数 $\gamma$ 和 $\lambda$ 的优势估计, 这两个参数都会影响偏差和方差之间的平衡. 但是, 它们的作用以及对平衡的影响有本质上的不同. $\gamma$ 决定了价值函数 $V^{\pi,\gamma}$ 的最大值, 而 $\lambda$ 对其并没有影响. 并且无论价值函数是否是正确的, 只要 $\gamma&lt;1$ 那么对于梯度 $g$ 来说就一定会有偏差 (因为已经使用了有折损回报, 而我们要优化的是无折损回报, 而 $\gamma$ 可以看做是影响方差与偏差的一个参数) . 而当 $\lambda&lt;1$ 时只有错误的价值函数才会带来偏差. 因此我们往往会发现 $\lambda$ 的最优值往往比 $\gamma$ 的最优值要低得多, 这有可能是因为当获取到一个足够准确的价值函数后 $\lambda$ 引入的误差要比 $\gamma$ 小得多.</p>
<p> 使用广义优势估计, 我们可以构建一个 $g^\gamma$ 的有偏估计<br>$$<br>  g^\gamma\approx\mathbb{E}\left[\sum_{t=0}^{\infty}\nabla_\theta\log\pi_{\theta}(a_t\mid s_t)\hat{A}<em>t^{\mathrm{GAE(\gamma,\lambda)}}\right]=\mathbb{E}\left[\sum</em>{t=0}^{\infty}\nabla_\theta\log\pi_{\theta}(a_t\mid s_t)\sum_{l=0}^\infty (\gamma\lambda)^l\delta_{t+l}^V\right]\tag{19}<br>$$<br>  当 $\lambda=1$ 时就是无偏的了.</p>
<h1 id="对回报变形后的解释"><a href="#对回报变形后的解释" class="headerlink" title="对回报变形后的解释"></a>对回报变形后的解释</h1><p> 令 $\Phi \colon\mathcal{S}\to \mathbb{R}$ 为在状态空间上的任意函数考虑一个变形后的回报 $\tilde{r}$<br>$$<br>  \tilde{r}(s,a,s’)=r(s,a,s’)+\gamma\Phi(s’)-\Phi(s)\tag{20}<br>$$<br>   容易证明, 对于变形后的回报, 其有折损和为<br>$$<br>  \sum_{l=0}^\infty \gamma^l \tilde{r}(s_{t+l},a_t,a_{t+l+1})=\sum_{l=0}^\infty \gamma^l r(s_{t+l},a_t,a_{t+l+1})-\Phi(s_t)\tag{21}<br>$$<br>  类似的, 我们定义<br>$$<br>  \begin{align}<br>  &amp;\tilde{Q}^{\pi,\gamma}(s,a)=Q^{\pi,\gamma}(s,a)-\Phi(s)\tag{22}\\<br>  &amp;\tilde{V}^{\pi,\gamma}(s)=V^{\pi,\gamma}(s)-\Phi(s)\tag{23}\\<br>  &amp;\tilde{A}^{\pi,\gamma}(s,a)=(Q^{\pi,\gamma}(s,a)-\Phi(s)\tag{24})-(V^{\pi,\gamma}(s)-\Phi(s))=A^{\pi,\gamma}(s,a)<br>  \end{align}<br>$$<br>  分别为变形后的回报, 价值, 优势函数.</p>
<p> 注意到如果 $\Phi$ 如果恰好等于价值函数 $V^{\pi,\gamma}$ , 那么 $\tilde{V}^{\pi,\gamma}(s)$ 对于任何状态都等于零.</p>
<p> 同样注意到使用变形后的回报对最大化有折损回报 $\sum_{t=0}^\infty \gamma^tr(s_t,a_t,s_{t+1})$ 的策略梯度是没有影响的. 但本文的目的其实是要最大化无折损回报, 而 $\gamma$ 是一个用来降低方差的参数.</p>
<p> 引出回报变形后, 让我们考虑将其使用在梯度估计上. 注意到 $\tilde{r}$ 的形式与 $\delta^V$ 完全相同. 如果我们令 $\Phi=V$ , 那么就有<br>$$<br>  \sum_{l=0}^\infty(\gamma\lambda)^l\tilde{r}(s_{t+l},a_t,s_{t+l+1})=\sum_{l=0}^\infty(\gamma\lambda)^l\delta_{t+l}^V=\hat{A}<em>t^{\mathrm{GAE}(\gamma,\lambda)}\tag{25}<br>$$<br>  因此可以将 $\delta</em>{t}^V$ 理解为变形后的回报, 而 $\gamma\lambda$ 其实就是折损.</p>
<p> 还有另一种解释, 记响应函数 $\chi$ 为<br>$$<br>\chi(l;s,a)=\mathbb{E}[r_{t+l}\mid s_t,a_t]-\mathbb{E}[r_{t+l}\mid s_t]\tag{26}<br>$$<br>  注意到 $A^{\pi,\gamma}(s,a)=\sum_{l=0}^\infty \gamma^l\chi(l;s,a)$ . 然后重新回忆我们用折扣因子 $\gamma$ 和 $A^{\pi,\gamma}$ 来估计的策略梯度 (式 $(6)$) 中有如下形式<br>$$<br>  \nabla_\theta \log\pi_0(a_t\mid s_t)A^{\pi, \gamma}(s_t,a_t)=\nabla_\theta \log\pi_0(a_t\mid s_t)\sum_{l=0}^\infty \gamma^l\chi(l;s,a)\tag{27}<br>$$<br>  使用折扣 $\gamma&lt;1$ 相当于急剧减少当 $l\gg 1/(1-\gamma)$ 的项 (因为 $\gamma^l$ 会变得很小) . 因此这些项带来的偏差会随着 $l$ 的增加而变少, 也就是说, 在大约 $1/(1-\gamma)$ 步后行动对回报的影响看起来像是被 “遗忘” 了.</p>
<p> 如果我们使用变形后的回报 $\tilde{r}$ 并且有 $\Phi=V^{\pi,\gamma}$ , 不难得到当 $l&gt;0$ 时有 $\mathbb{E}[\tilde{r}<em>{t+l}\mid s_t,a_t]=\mathbb{E}[\tilde{r}</em>{t+l}\mid s_t]=0$ (由于 $t$ 之后时刻采取的动作的概率完全依赖于那时候的状态 $s$ , 而 $t$ 时刻的动作已经给定为 $a_t$) . 因此只有 $l=0$ 是项才是非零的. 因此变形回报将依赖长时间的梯度估计变成了瞬时的. 当然我们往往难以得到非常好的估计, 但这已经能够给出式 $(16)$ 的一个很好的解释: 变形后的回报可以通过拟合 $V^{\pi,\gamma}$ 来减少响应函数起作用的范围, 通过引入一个更陡的折损 $\gamma\lambda$ 来减少由回报延迟 (即梯度估计中有很多项回报的和) 引起的噪声 (这是产生方差的原因), 也就是当 $l\gg 1/(1-\gamma\lambda)$ 时忽略项 $\nabla_\theta \log\pi_0(a_t\mid s_t)\delta^{V}_{t+l}$ .</p>
<h1 id="后记"><a href="#后记" class="headerlink" title="后记"></a>后记</h1><p> 这篇文章可能可以算是论文 <a target="_blank" rel="noopener" href="https://arxiv.org/pdf/1506.02438.pdf">High-Dimensional Continuous Control Using Generalized Advantage Estimation</a> 的一个翻译, 但是其中增添了一些我自己的理解并且有些东西换了一些表述. 这篇文章只有论文前半部分的内容, 后面则是讲如何拟合价值函数以及一些探究, 有兴趣的可以看原论文. 其实原论文后面这段解释我看着怪怪的… 虽然说不出什么问题吧… 但是总感觉没什么必要… 因为前面的介绍以及推导已经解释了 $\text{GAE}$ 是如何起作用的… 还有我本人英语非常菜, 借助了翻译才一点点看下来… 如果哪个地方理解不对请指出, <del>还有大佬们请轻喷</del>.</p>
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta">文章作者: </span><span class="post-copyright-info"><a href="mailto:undefined">云玩家</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta">文章链接: </span><span class="post-copyright-info"><a href="http://yunist.cn/ML/RL/primer/GAE/">http://yunist.cn/ML/RL/primer/GAE/</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta">版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外，均采用 <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="http://yunist.cn" target="_blank">云玩家</a>！</span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/">机器学习</a><a class="post-meta__tags" href="/tags/%E4%BC%98%E5%8C%96%E7%AE%97%E6%B3%95/">优化算法</a><a class="post-meta__tags" href="/tags/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/">强化学习</a><a class="post-meta__tags" href="/tags/%E8%AE%BA%E6%96%87%E7%B2%BE%E8%AF%BB/">论文精读</a></div><div class="post_share"><div class="social-share" data-image="/ML/RL/primer/GAE/GAE.jpg" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/social-share.js/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/social-share.js/dist/js/social-share.min.js" defer></script></div></div><nav class="pagination-post" id="pagination"><div class="prev-post pull-left"><!-- - var pagination_cover = prev.cover === false ? prev.randomcover : prev.cover--><a href="/python/jupyter/jupyter_server/"><img class="prev-cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/python/jupyter/jupyter_server/jupyter_server.png" onerror="onerror=null;src='/img/404.jpg'" alt="cover of previous post"><div class="pagination-info"><div class="label">上一篇</div><div class="prev_info">Anaconda + PM2 部署 Jupyter 在服务器</div></div></a></div><!-- - var pagination_cover = next.cover == false ? next.randomcover : next.cover--><div class="next-post pull-right"><a href="/scribble/proxy_web/"><img class="next-cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/scribble/proxy_web/proxy_web.png" onerror="onerror=null;src='/img/404.jpg'" alt="cover of next post"><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">零成本搭建网页代理</div></div></a></div></nav><div class="relatedPosts"><div class="headline"><i class="fas fa-thumbs-up fa-fw"></i><span> 相关推荐</span></div><div class="relatedPosts-list"><div><a href="/ML/RL/papers_read/Model-Free_RL/Deep_Q-Learning/1/" title="DQN 算法"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/papers_read/Model-Free_RL/Deep_Q-Learning/1/1.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-02-13</div><div class="title">DQN 算法</div></div></a></div><div><a href="/ML/optimizer/numerical_analytical_gradient/" title="数值梯度与解析梯度"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/optimizer/numerical_analytical_gradient/numerical_analytical_gradient.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-05-24</div><div class="title">数值梯度与解析梯度</div></div></a></div><div><a href="/ML/optimizer/optimizer_all/" title="优化算法汇总"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/optimizer/optimizer_all/optimizer_all.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-03-14</div><div class="title">优化算法汇总</div></div></a></div><div><a href="/ML/RL/papers_read/RL_papers_word/" title="强化学习类论文常用表达"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/papers_read/RL_papers_word/RL_papers_word.jpg" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-09-13</div><div class="title">强化学习类论文常用表达</div></div></a></div><div><a href="/ML/RL/primer/Intro_to_Policy_Optimization_code/" title="Intro to Policy Optimization 代码详解"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/primer/Intro_to_Policy_Optimization_code/Intro_to_Policy_Optimization_code.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-06-25</div><div class="title">Intro to Policy Optimization 代码详解</div></div></a></div><div><a href="/ML/RL/primer/RL_demo/" title="强化学习入门 Demo"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/primer/RL_demo/RL_demo.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-04-27</div><div class="title">强化学习入门 Demo</div></div></a></div></div></div><hr/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="fas fa-comments fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="twikoo-wrap"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="card-info-avatar is-center"><img class="avatar-img" src="https://cdn.jsdelivr.net/gh/cnyist/blog/img/avatar.svg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/><div class="author-info__name">云玩家</div><div class="author-info__description">云玩家's blog</div></div><div class="card-info-data"><div class="card-info-data-item is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">97</div></a></div><div class="card-info-data-item is-center"><a href="/tags/"><div class="headline">标签</div><div class="length-num">40</div></a></div><div class="card-info-data-item is-center"><a href="/categories/"><div class="headline">分类</div><div class="length-num">27</div></a></div></div><a class="button--animated" id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/xxxxxx"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/cnyist" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="https://blog.csdn.net/chnyist" target="_blank" title="CSDN"><i class="fas iconfont icon-CN_csdnnet"></i></a><a class="social-icon" href="https://www.zhihu.com/people/cnyist" target="_blank" title="知乎"><i class="fas iconfont icon-zhihu"></i></a><a class="social-icon" href="mailto:yunist@qq.com" target="_blank" title="Email"><i class="fas fa-envelope"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn card-announcement-animation"></i><span>公告</span></div><div class="announcement_content">This is my Blog</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span></div><div class="toc-content"><ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%87%86%E5%A4%87"><span class="toc-number">1.</span> <span class="toc-text">准备</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E4%BC%98%E5%8A%BF%E5%87%BD%E6%95%B0%E7%9A%84%E4%BC%B0%E8%AE%A1"><span class="toc-number">2.</span> <span class="toc-text">优势函数的估计</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%AF%B9%E5%9B%9E%E6%8A%A5%E5%8F%98%E5%BD%A2%E5%90%8E%E7%9A%84%E8%A7%A3%E9%87%8A"><span class="toc-number">3.</span> <span class="toc-text">对回报变形后的解释</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%90%8E%E8%AE%B0"><span class="toc-number">4.</span> <span class="toc-text">后记</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/set_theory/unique_ordinal/" title="序数的唯一性"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/set_theory/unique_ordinal/unique_ordinal.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="序数的唯一性"/></a><div class="content"><a class="title" href="/math/set_theory/unique_ordinal/" title="序数的唯一性">序数的唯一性</a><time datetime="2022-01-12T08:26:12.000Z" title="发表于 2022-01-12 16:26:12">2022-01-12</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/" title="(史济怀) 数学分析教程上册第 3 版-练习题 3.1"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/math.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 3.1"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/" title="(史济怀) 数学分析教程上册第 3 版-练习题 3.1">(史济怀) 数学分析教程上册第 3 版-练习题 3.1</a><time datetime="2021-10-19T00:30:55.000Z" title="发表于 2021-10-19 08:30:55">2021-10-19</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.11"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/2-11.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.11"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.11">(史济怀) 数学分析教程上册第 3 版-练习题 2.11</a><time datetime="2021-09-25T03:37:28.000Z" title="发表于 2021-09-25 11:37:28">2021-09-25</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.10"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/2-10.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.10"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.10">(史济怀) 数学分析教程上册第 3 版-练习题 2.10</a><time datetime="2021-09-22T07:36:49.000Z" title="发表于 2021-09-22 15:36:49">2021-09-22</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.9"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/2-9.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.9"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.9">(史济怀) 数学分析教程上册第 3 版-练习题 2.9</a><time datetime="2021-09-20T08:19:06.000Z" title="发表于 2021-09-20 16:19:06">2021-09-20</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"><div class="copyright">&copy;2020 - 2022 By 云玩家</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div></div></footer><script>document.getElementById('web_bg').style = 'background-image: url("' + get_rand_jsd_pic('cnyist', 'banner@master', '/', 70, '-min.jpg') + '")'</script></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comments"></i></a><button id="go-up" type="button" title="回到顶部"><i class="fas fa-arrow-up"></i></button></div></div><div id="local-search"><div class="search-dialog"><div class="search-dialog__title" id="local-search-title">本地搜索</div><div id="local-input-panel"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div></div><hr/><div id="local-search-results"></div><span class="search-close-button"><i class="fas fa-times"></i></span></div><div id="search-mask"></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/js/search/local-search.js"></script><div class="js-pjax"><script>if (!window.MathJax) {
  window.MathJax = {
    loader: {
      source: {
        '[tex]/amsCd': '[tex]/amscd'
      }
    },
    tex: {
      inlineMath: [ ['$','$'], ["\\(","\\)"]],
      tags: 'ams'
    },
    options: {
      renderActions: {
        findScript: [10, doc => {
          for (const node of document.querySelectorAll('script[type^="math/tex"]')) {
            const display = !!node.type.match(/; *mode=display/)
            const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display)
            const text = document.createTextNode('')
            node.parentNode.replaceChild(text, node)
            math.start = {node: text, delim: '', n: 0}
            math.end = {node: text, delim: '', n: 0}
            doc.math.push(math)
          }
        }, ''],
        addClass: [200,() => {
          document.querySelectorAll('mjx-container:not([display=\'true\']').forEach( node => {
            const target = node.parentNode
            if (!target.classList.contains('has-jax')) {
              target.classList.add('mathjax-overflow')
            }
          })
        }, '', false]
      }
    }
  }
  
  const script = document.createElement('script')
  script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js'
  script.id = 'MathJax-script'
  script.async = true
  document.head.appendChild(script)
} else {
  MathJax.startup.document.state(0)
  MathJax.texReset()
  MathJax.typeset()
}</script><script>(()=>{
  const $countDom = document.getElementById('twikoo-count')
  const init = () => {
    let initData = {
      el: '#twikoo-wrap',
      envId: 'yunist-147dfe',
      region: ''
    }

    if (false) {
      const otherData = false
      initData = Object.assign(initData, otherData)
    }
    
    twikoo.init(initData)
  }

  const getCount = () => {
    twikoo.getCommentsCount({
      envId: 'yunist-147dfe',
      region: '',
      urls: [window.location.pathname],
      includeReply: false
    }).then(function (res) {
      $countDom.innerText = res[0].count
    }).catch(function (err) {
      console.error(err);
    });
  }

  const loadTwikoo = (bool = false) => {
    if (typeof twikoo === 'object') {
      init()
      bool && $countDom && setTimeout(getCount,0)
    } else {
      getScript('https://cdn.jsdelivr.net/npm/twikoo/dist/twikoo.all.min.js').then(()=> {
        init()
        bool && $countDom && setTimeout(getCount,0)
      })
    }
  }

  if ('Twikoo' === 'Twikoo' || !false) {
    if (false) btf.loadComment(document.getElementById('twikoo-wrap'), loadTwikoo)
    else loadTwikoo(true)
  } else {
    window.loadOtherComment = () => {
      loadTwikoo()
    }
  }
})()</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script></div></body></html>